Skip to content

Instantly share code, notes, and snippets.

@timarnold
Last active March 30, 2020 15:28
Show Gist options
  • Save timarnold/8f1ab3e2b87c54e3ce73b9c99a5bc35f to your computer and use it in GitHub Desktop.
Save timarnold/8f1ab3e2b87c54e3ce73b9c99a5bc35f to your computer and use it in GitHub Desktop.
import pandas as pd
import matplotlib.pyplot as plt
import pylab as plot
import numpy as np
# Graph appearance parameters
params = {
'legend.fontsize': 20,
'legend.handlelength': 2,
'axes.titlesize': 24,
'axes.labelsize': 20,
'lines.linewidth': 3,
'lines.markersize': 12,
'xtick.labelsize': 18,
'ytick.labelsize': 18,
'xtick.major.size': 12,
'xtick.major.width': 3,
'ytick.major.size': 12,
'ytick.major.width': 3,
'ytick.minor.size': 6,
'ytick.minor.width': 1.5,
}
plot.rcParams.update(params)
# Fetch data
df = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv',
parse_dates=[0],
index_col=False,
)
df = df.drop(columns=['fips', 'deaths'])
n_since = 100
fig, ax = plt.subplots(figsize=(18,10))
df_orig = df.copy()
# Plot aggregate USA
_df_us = df_orig.copy()
_df_us = _df_us.set_index(['date', 'state', 'county'])
_df_us = _df_us.sum(level=['date'])
_df_us = _df_us.reset_index()
_df_us = _df_us[_df_us.cases > n_since]
_df_us['days_since'] = pd.to_datetime(_df_us.date).sub(_df_us.date.min()).dt.days.tolist()
_df_us.plot(x='days_since', y='cases', ax=ax, label='USA', logy=True, marker='o', color='grey')
# Plot growth lines
x = np.arange(_df_us.days_since.max() + 1)
for r in [0.25, 0.35, 0.50]:
ax.plot(x, n_since * np.power(2, r * x), linestyle='--', label="%d%% Growth" % (100 * r), color='grey')
# Plot states
df = df.set_index(['date', 'state', 'county'])
df = df.sum(level=['date', 'state'])
df = df.reset_index()
df = df[df.cases > n_since]
for name, group in df.groupby('state'):
if name in [
'Ohio',
'Pennsylvania',
'New York',
'Washington',
'California',
'New Jersey',
'Pennsylvania'
]:
_g = group.copy()
_g['days_since'] = pd.to_datetime(_g.date).sub(_g.date.min()).dt.days.tolist()
_g.plot(x='days_since', y='cases', ax=ax, label=name, logy=True, marker='o')
# Plot Allegheny County, could generalize this to other counties if we wanted
_df = df_orig.copy()
_df = _df[(_df.state == 'Pennsylvania') & (_df.county == 'Allegheny')]
_df = _df[_df.cases > n_since]
_df['days_since'] = pd.to_datetime(_df.date).sub(_df.date.min()).dt.days.tolist()
_df.plot(x='days_since', y='cases', ax=ax, label='Alleghey County', logy=True, marker='o', color='black', linewidth=6)
# Graph apperance
plt.legend()
plt.xticks(np.arange(0, _df_us.days_since.max() + 2, step=1))
plt.xlabel("Days since %d cases" % n_since)
plt.ylabel("Cases")
plt.grid()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment