Last active
March 30, 2020 15:28
-
-
Save timarnold/8f1ab3e2b87c54e3ce73b9c99a5bc35f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import matplotlib.pyplot as plt | |
import pylab as plot | |
import numpy as np | |
# Graph appearance parameters | |
params = { | |
'legend.fontsize': 20, | |
'legend.handlelength': 2, | |
'axes.titlesize': 24, | |
'axes.labelsize': 20, | |
'lines.linewidth': 3, | |
'lines.markersize': 12, | |
'xtick.labelsize': 18, | |
'ytick.labelsize': 18, | |
'xtick.major.size': 12, | |
'xtick.major.width': 3, | |
'ytick.major.size': 12, | |
'ytick.major.width': 3, | |
'ytick.minor.size': 6, | |
'ytick.minor.width': 1.5, | |
} | |
plot.rcParams.update(params) | |
# Fetch data | |
df = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv', | |
parse_dates=[0], | |
index_col=False, | |
) | |
df = df.drop(columns=['fips', 'deaths']) | |
n_since = 100 | |
fig, ax = plt.subplots(figsize=(18,10)) | |
df_orig = df.copy() | |
# Plot aggregate USA | |
_df_us = df_orig.copy() | |
_df_us = _df_us.set_index(['date', 'state', 'county']) | |
_df_us = _df_us.sum(level=['date']) | |
_df_us = _df_us.reset_index() | |
_df_us = _df_us[_df_us.cases > n_since] | |
_df_us['days_since'] = pd.to_datetime(_df_us.date).sub(_df_us.date.min()).dt.days.tolist() | |
_df_us.plot(x='days_since', y='cases', ax=ax, label='USA', logy=True, marker='o', color='grey') | |
# Plot growth lines | |
x = np.arange(_df_us.days_since.max() + 1) | |
for r in [0.25, 0.35, 0.50]: | |
ax.plot(x, n_since * np.power(2, r * x), linestyle='--', label="%d%% Growth" % (100 * r), color='grey') | |
# Plot states | |
df = df.set_index(['date', 'state', 'county']) | |
df = df.sum(level=['date', 'state']) | |
df = df.reset_index() | |
df = df[df.cases > n_since] | |
for name, group in df.groupby('state'): | |
if name in [ | |
'Ohio', | |
'Pennsylvania', | |
'New York', | |
'Washington', | |
'California', | |
'New Jersey', | |
'Pennsylvania' | |
]: | |
_g = group.copy() | |
_g['days_since'] = pd.to_datetime(_g.date).sub(_g.date.min()).dt.days.tolist() | |
_g.plot(x='days_since', y='cases', ax=ax, label=name, logy=True, marker='o') | |
# Plot Allegheny County, could generalize this to other counties if we wanted | |
_df = df_orig.copy() | |
_df = _df[(_df.state == 'Pennsylvania') & (_df.county == 'Allegheny')] | |
_df = _df[_df.cases > n_since] | |
_df['days_since'] = pd.to_datetime(_df.date).sub(_df.date.min()).dt.days.tolist() | |
_df.plot(x='days_since', y='cases', ax=ax, label='Alleghey County', logy=True, marker='o', color='black', linewidth=6) | |
# Graph apperance | |
plt.legend() | |
plt.xticks(np.arange(0, _df_us.days_since.max() + 2, step=1)) | |
plt.xlabel("Days since %d cases" % n_since) | |
plt.ylabel("Cases") | |
plt.grid() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment