Skip to content

Instantly share code, notes, and snippets.

@timarnold
Created April 13, 2020 21:43
Show Gist options
  • Save timarnold/104952f8bcb1b63ee2556ae4c68f5db9 to your computer and use it in GitHub Desktop.
Save timarnold/104952f8bcb1b63ee2556ae4c68f5db9 to your computer and use it in GitHub Desktop.
import pandas as pd
import matplotlib.pyplot as plt
import pylab as plot
import numpy as np
from scipy.optimize import curve_fit
def func(x, a, b):
return a * np.power(b, x)
def logistic(x, L, k, x0):
return L / (1 + np.exp(-k * (x - x0)))
params = {
'legend.fontsize': 20,
'legend.handlelength': 2,
'axes.titlesize': 24,
'axes.labelsize': 20,
'lines.linewidth': 3,
'lines.markersize': 12,
'xtick.labelsize': 18,
'ytick.labelsize': 18,
'xtick.major.size': 12,
'xtick.major.width': 3,
'ytick.major.size': 12,
'ytick.major.width': 3,
'ytick.minor.size': 6,
'ytick.minor.width': 1.5,
}
plot.rcParams.update(params)
n_since = 30
df = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv',
parse_dates=[0],
index_col=False,
)
df = df.drop(columns=['fips', 'deaths'])
df_orig = df.copy()
_df_us = df_orig.copy()
_df_us = _df_us.set_index(['date', 'state', 'county'])
_df_us = _df_us.sum(level=['date'])
_df_us = _df_us.reset_index()
_df_us = _df_us[_df_us.cases > n_since]
_df_us['days_since'] = pd.to_datetime(_df_us.date).sub(_df_us.date.min()).dt.days.tolist()
all_x = np.arange(0, _df_us.days_since.max() + 2, step=1)
plot_states = True
plot_counties = False
plot_usa = False
populations = {
'Ohio': 11.69,
'Pennsylvania': 12.81,
'New York': 8.623,
'Washington': 7.536,
'California': 39.56,
'New Jersey': 8.909,
'Florida': 21.3,
'Michigan': 9.996,
'Louisiana': 4.66,
'Florida': 21.3,
}
print(df_orig.date.tolist()[-1])
fig, ax = plt.subplots(figsize=(18,10))
if plot_usa:
_df_us.plot(x='days_since', y='cases', ax=ax, label='USA', logy=True, marker='o', color='lightgrey')
# Plot growth lines
x = np.arange(_df_us.days_since.max() + 1)
for r in [0.25, 0.35, 0.50]:
ax.plot(x, n_since * np.power(2, r * x), linestyle='--', color='lightgrey')
# Plot states
df = df_orig.copy()
df = df.set_index(['date', 'state', 'county'])
df = df.sum(level=['date', 'state'])
df = df.reset_index()
for state, group in df.groupby('state'):
if state in [
'Ohio',
'Pennsylvania',
'New York',
'Washington',
'California',
'New Jersey',
'Pennsylvania',
'Florida',
'Michigan',
'Louisiana',
'Florida',
]:
_g = group.copy()
_g['cases_per_million'] = _g.cases / populations[state]
_g = _g[_g.cases > n_since]
# _g = _g[_g.cases_per_million > 1]
_g['days_since'] = pd.to_datetime(_g.date).sub(_g.date.min()).dt.days.tolist()
_g.plot(x='days_since', y='cases', ax=ax, label=state, logy=True, marker='o')
plt.legend()
plt.xticks(np.arange(0, _df_us.days_since.max() + 2, step=1))
ax.set_xlim(0, 45)
# ax.set_ylim(1, 20000)
ax.set_ylim(10, 250000)
plt.xlabel("Days since %d cases" % n_since)
plt.ylabel("Cases")
plt.grid()
fig, ax = plt.subplots(figsize=(18,10))
# Plot growth lines
x = np.arange(_df_us.days_since.max() + 1)
for r in [0.25, 0.35, 0.50]:
ax.plot(x, n_since * np.power(2, r * x), linestyle='--', color='lightgrey')
for state, county in [
('Pennsylvania', 'Philadelphia'),
# ('New York', 'New York City'),
# ('Washington', 'King'),
# ('Ohio', 'Franklin'),
('Pennsylvania', 'Allegheny'),
('Tennessee', 'Williamson'),
('Tennessee', 'Davidson'),
('Tennessee', 'Shelby'),
('Tennessee', 'Knox'),
# ('Michigan', 'Wayne')
]:
_df = df_orig.copy()
_df = _df[(_df.state == state) & (_df.county == county)]
_df = _df[_df.cases > n_since]
_df['days_since'] = pd.to_datetime(_df.date).sub(_df.date.min()).dt.days.tolist()
n = 3
popt, pcov = curve_fit(func, _df.days_since.tolist()[-n:], _df.cases.tolist()[-n:])
ax.semilogy(all_x, func(all_x, *popt), linestyle='--')
_df.plot(x='days_since', y='cases', ax=ax, label=county + (", %.1f%%" % ((popt[1] - 1) * 100)), logy=True, marker='o', color=ax.get_lines()[-1].get_color())
# Graph apperance
plt.legend()
plt.xticks(np.arange(0, 34, step=1))
ax.set_xlim(0, 34)
ax.set_ylim(10, 100000)
plt.xlabel("Days since %d cases" % n_since)
plt.ylabel("Cases")
plt.grid()
fig, ax = plt.subplots(figsize=(18,10))
for state, county in [
('Pennsylvania', 'Allegheny'),
('Pennsylvania', 'Philadelphia'),
('Tennessee', 'Williamson'),
('Tennessee', 'Davidson'),
('Tennessee', 'Shelby'),
('Tennessee', 'Knox'),
# ('New York', 'New York City'),
# ('Washington', 'King'),
# ('Ohio', 'Franklin')
]:
_df = df_orig.copy()
_df = _df[(_df.state == state) & (_df.county == county)]
_df = _df[_df.cases > n_since]
_df['days_since'] = pd.to_datetime(_df.date).sub(_df.date.min()).dt.days.tolist()
days_since = True
if days_since:
_df = _df.drop(columns=['state', 'county', 'date'])
_df = _df.set_index(['days_since'])
_df['pct_change'] = _df.pct_change(periods=1) * 100
_df = _df.reset_index()
_df.rolling(5).mean().plot(x='days_since', y='pct_change', ax=ax, label=county, logy=False, marker='o')
else:
_df = _df.drop(columns=['state', 'county', 'days_since'])
_df = _df.set_index(['date'])
_df['pct_change'] = _df.pct_change(periods=1) * 100
_df = _df.rolling(3).mean()
_df.reset_index()
_df.plot(y='pct_change', ax=ax, label=county, logy=False, marker='o')
# Graph apperance
plt.legend()
plt.xticks(np.arange(0, 32, step=1))
ax.set_xlim(0, 28)
ax.set_ylim(0, 60)
plt.xlabel("Days since %d cases" % n_since)
plt.ylabel("Growth")
plt.grid()
fig, ax = plt.subplots(figsize=(18,10))
for state, county in [
('Pennsylvania', 'Allegheny'),
# ('Pennsylvania', 'Philadelphia'),
('Ohio', 'Franklin'),
('Tennessee', 'Williamson'),
('Tennessee', 'Davidson'),
('Tennessee', 'Shelby'),
('Tennessee', 'Knox'),
# ('New York', 'New York City')
]:
_df = df_orig.copy()
_df = _df[(_df.state == state) & (_df.county == county)]
_df = _df[_df.cases > 0]
print(_df.date.tolist()[0])
_df['days_since'] = pd.to_datetime(_df.date).sub(_df.date.min()).dt.days.tolist()
popt, pcov = curve_fit(logistic, _df.days_since.tolist(), _df.cases.tolist(), p0=[1200, 0.2, 22])
print(popt)
ax.plot(all_x, logistic(all_x, *popt), linestyle='--')
# ax.plot(all_x, logistic(all_x, 1200, 0.2, 22), linestyle='--')
_df.plot(x='days_since', y='cases', ax=ax, label=county, logy=False, marker='o', color=ax.get_lines()[-1].get_color())
# Graph apperance
# plt.legend()
plt.xticks(np.arange(0, 45, step=1))
ax.set_xlim(0, 45)
# ax.set_ylim(10, 100000)
plt.xlabel("Days since %d cases" % n_since)
plt.ylabel("Total Cases")
plt.grid()
fig, ax = plt.subplots(figsize=(18,10))
# Plot growth lines
x = np.arange(_df_us.days_since.max() + 1)
for r in [0.25, 0.35, 0.50]:
ax.plot(x, n_since * np.power(2, r * x), linestyle='--', color='lightgrey')
for state, county in [
('Pennsylvania', 'Philadelphia'),
('Pennsylvania', 'Butler'),
('Pennsylvania', 'Montgomery'),
('Pennsylvania', 'Delaware'),
('Pennsylvania', 'Bucks'),
('Pennsylvania', 'Chester'),
]:
_df = df_orig.copy()
_df = _df[(_df.state == state) & (_df.county == county)]
_df = _df[_df.cases > n_since]
_df['days_since'] = pd.to_datetime(_df.date).sub(_df.date.min()).dt.days.tolist()
_df.plot(x='days_since', y='cases', ax=ax, label=county, logy=True, marker='o', color='red')
for state, county in [
('Pennsylvania', 'Allegheny'),
('Pennsylvania', 'Beaver'),
('Pennsylvania', 'Butler'),
('Pennsylvania', 'Westmoreland'),
]:
_df = df_orig.copy()
_df = _df[(_df.state == state) & (_df.county == county)]
_df = _df[_df.cases > n_since]
_df['days_since'] = pd.to_datetime(_df.date).sub(_df.date.min()).dt.days.tolist()
_df.plot(x='days_since', y='cases', ax=ax, label=county, logy=True, marker='o', color='blue')
# Graph apperance
plt.legend()
plt.xticks(np.arange(0, 22, step=1))
ax.set_xlim(0, 22)
ax.set_ylim(10, 10000)
plt.xlabel("Days since %d cases" % n_since)
plt.ylabel("Cases")
plt.grid()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment