Created
April 12, 2020 10:40
-
-
Save mtavkhelidze/4cb4396b1337b47af717bdefe5098efc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
import pandas as pd | |
from pandas import DataFrame | |
from pandas.io.common import urlopen | |
import numpy as np | |
URL = "https://covidtracking.com/api/v1/states/daily.json" | |
ALPHA = 0.1 | |
LOGARITHMIC = False | |
def fetch_data(*, url: str) -> DataFrame: | |
response = urlopen(url) | |
return pd.read_json(response, convert_dates=["date", "dateChecked"]) | |
def calc_ema(*, col_name: str, d: DataFrame, alpha: float = 0.5) -> None: | |
ema_name = f"{col_name}_ema_{alpha}" | |
d[ema_name] = d[col_name].ewm(alpha=alpha, adjust=True).mean() | |
# noinspection PyShadowingNames | |
def get_data(*, url=URL, alpha=ALPHA) -> DataFrame: | |
df = fetch_data(url=url) | |
df["date"] = pd.to_datetime(df["date"], format="%Y%m%d") | |
df = df.groupby(by="date").sum() | |
calc_ema(col_name="death", d=df, alpha=alpha) | |
calc_ema(col_name="positive", d=df, alpha=alpha) | |
calc_ema(col_name="negative", d=df, alpha=alpha) | |
calc_ema(col_name="recovered", d=df, alpha=alpha) | |
calc_ema(col_name="hospitalized", d=df, alpha=alpha) | |
calc_ema(col_name="inIcuCurrently", d=df, alpha=alpha) | |
return df | |
# noinspection PyShadowingNames | |
def create_fig(*, df: DataFrame, log=LOGARITHMIC, alpha=ALPHA): | |
plt.grid(True) | |
plt.bar( | |
df.index, | |
df["totalTestResults"], | |
label="Total Test Results", | |
color="tab:grey", | |
log=log, | |
) | |
plt.plot( | |
df[f"positive_ema_{alpha}"], label=f"Positive EMA α={alpha}", color="blue", lw=3 | |
) | |
# plt.plot( | |
# df[f"recovered_ema_{alpha}"], | |
# label=f"Positive EMA α={alpha}", | |
# color="black", | |
# lw=3, | |
# ) | |
plt.plot( | |
df[f"negative_ema_{alpha}"], | |
label=f"Negative EMA α={alpha}", | |
color="green", | |
lw=3, | |
) | |
plt.plot(df[f"death_ema_{alpha}"], label=f"Deaths EMA α={alpha}", color="red", lw=3) | |
plt.plot( | |
df[f"hospitalized_ema_{alpha}"], | |
label=f"In Hospitals EMA α={alpha}", | |
color="yellow", | |
lw=3, | |
) | |
plt.plot( | |
df[f"inIcuCurrently_ema_{alpha}"], | |
label=f"In ICU EMA α={alpha}", | |
color="purple", | |
lw=3, | |
) | |
upto = df.index[-1].strftime("%d %b, %Y") | |
plt.suptitle(f"The U.S. COVID-19 Stats as of {upto}{' (logarithmic)' if log else ''}") | |
plt.gca().set_ylabel(f"Number of people{' (logarithmic)' if log else ''}") | |
plt.gca().set_xlabel("Days") | |
plt.annotate( | |
"Source: covidtracking.com", | |
xy=(1, 0), | |
xycoords=("axes fraction", "figure fraction"), | |
xytext=(0, 10), | |
textcoords="offset points", | |
ha="right", | |
va="bottom", | |
) | |
plt.legend(loc="upper left") | |
plt.gcf().set_size_inches([12.8, 9.6]) | |
plt.tight_layout(rect=[0, 0.03, 1, 0.97]) | |
def calc_cors(*, d: DataFrame, method="pearson"): | |
return { | |
"Positive/Death": d["positive"].corr(d["death"], method=method), | |
"Positive/Negative": d["positive"].corr(d["negative"], method=method), | |
"Total/Negative": d["totalTestResults"].corr(d["negative"], method=method), | |
"Total/Positive": d["totalTestResults"].corr(d["positive"], method=method), | |
} | |
df = get_data() | |
print( | |
df[ | |
[ | |
"totalTestResults", | |
"positive", | |
"negative", | |
"death", | |
"hospitalized", | |
"inIcuCurrently", | |
"recovered", | |
] | |
] | |
) | |
corr_method = "pearson" | |
print(f"Correlations ({corr_method.capitalize()})") | |
for name, value in calc_cors(d=df, method=corr_method).items(): | |
print(f" {name}: {np.round(value, 4)}") | |
create_fig(df=df, log=LOGARITHMIC) | |
plt.savefig(f"us-stats{'-log' if LOGARITHMIC else ''}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment