Created
March 31, 2020 12:27
-
-
Save timarnold/5ce063fe71ed2019dfd05f8761394785 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import matplotlib.pyplot as plt | |
import pylab as plot | |
import numpy as np | |
from scipy.optimize import curve_fit | |
def func(x, a, b): | |
return a * np.power(b, x) | |
# Graph | |
params = { | |
'legend.fontsize': 20, | |
'legend.handlelength': 2, | |
'axes.titlesize': 24, | |
'axes.labelsize': 20, | |
'lines.linewidth': 3, | |
'lines.markersize': 12, | |
'xtick.labelsize': 18, | |
'ytick.labelsize': 18, | |
'xtick.major.size': 12, | |
'xtick.major.width': 3, | |
'ytick.major.size': 12, | |
'ytick.major.width': 3, | |
'ytick.minor.size': 6, | |
'ytick.minor.width': 1.5, | |
} | |
plot.rcParams.update(params) | |
# Fetch data | |
df = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv', | |
parse_dates=[0], | |
index_col=False, | |
) | |
df = df.drop(columns=['fips', 'deaths']) | |
n_since = 100 | |
plot_states = False | |
plot_counties = True | |
plot_usa = False | |
print(df.date.tolist()[-1]) | |
fig, ax = plt.subplots(figsize=(18,10)) | |
df_orig = df.copy() | |
# Plot aggregate USA | |
_df_us = df_orig.copy() | |
_df_us = _df_us.set_index(['date', 'state', 'county']) | |
_df_us = _df_us.sum(level=['date']) | |
_df_us = _df_us.reset_index() | |
_df_us = _df_us[_df_us.cases > n_since] | |
_df_us['days_since'] = pd.to_datetime(_df_us.date).sub(_df_us.date.min()).dt.days.tolist() | |
if plot_usa: | |
_df_us.plot(x='days_since', y='cases', ax=ax, label='USA', logy=True, marker='o', color='lightgrey') | |
all_x = np.arange(0, _df_us.days_since.max() + 2, step=1) | |
# Plot growth lines | |
x = np.arange(_df_us.days_since.max() + 1) | |
for r in [0.25, 0.35, 0.50]: | |
ax.plot(x, n_since * np.power(2, r * x), linestyle='--', color='lightgrey') | |
# Plot states | |
if plot_states: | |
df = df.set_index(['date', 'state', 'county']) | |
df = df.sum(level=['date', 'state']) | |
df = df.reset_index() | |
df = df[df.cases > n_since] | |
for name, group in df.groupby('state'): | |
if name in [ | |
'Ohio', | |
'Pennsylvania', | |
'New York', | |
'Washington', | |
'California', | |
'New Jersey', | |
'Pennsylvania', | |
'Florida', | |
'Michigan' | |
]: | |
_g = group.copy() | |
_g['days_since'] = pd.to_datetime(_g.date).sub(_g.date.min()).dt.days.tolist() | |
_g.plot(x='days_since', y='cases', ax=ax, label=name, logy=True, marker='o') | |
# Plot Allegheny County, could generalize this to other counties if we wanted | |
if plot_counties: | |
for state, county in [ | |
('Pennsylvania', 'Allegheny'), | |
('Pennsylvania', 'Philadelphia'), | |
('New York', 'New York City'), | |
('Washington', 'King'), | |
('Ohio', 'Franklin'), | |
('Michigan', 'Wayne') | |
]: | |
_df = df_orig.copy() | |
_df = _df[(_df.state == state) & (_df.county == county)] | |
_df = _df[_df.cases > n_since] | |
_df['days_since'] = pd.to_datetime(_df.date).sub(_df.date.min()).dt.days.tolist() | |
n = 3 | |
popt, pcov = curve_fit(func, _df.days_since.tolist()[-n:], _df.cases.tolist()[-n:]) | |
ax.semilogy(all_x, func(all_x, *popt), linestyle='--') | |
_df.plot(x='days_since', y='cases', ax=ax, label=county + (", %.1f%%" % ((popt[1] - 1) * 100)), logy=True, marker='o', color=ax.get_lines()[-1].get_color()) | |
# Graph apperance | |
plt.legend() | |
plt.xticks(np.arange(0, _df_us.days_since.max() + 2, step=1)) | |
ax.set_xlim(0, 25) | |
ax.set_ylim(100, 100000) | |
plt.xlabel("Days since %d cases" % n_since) | |
plt.ylabel("Cases") | |
plt.grid() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment