Skip to content

Instantly share code, notes, and snippets.

@dylanjmcconnell
Created March 12, 2020 05:48
Show Gist options
  • Save dylanjmcconnell/4cb16eaf54037d6357fca6a9bbd73cb6 to your computer and use it in GitHub Desktop.
Save dylanjmcconnell/4cb16eaf54037d6357fca6a9bbd73cb6 to your computer and use it in GitHub Desktop.
Gets the latest data from the Johns Hopkins COVID-19 tracker, via GitHub, via Nick
# This script will get the latest data from the Jons Hopkins GitHub repo https://github.com/CSSEGISandData
# Loads into data frame (doesn't save) and parses dates, - in long form by default.
# Modified from Nick Evershed's version https://gist.github.com/nickjevershed
import requests
import pandas as pd
import datetime
from io import BytesIO
def getData(_long=True):
#dict of observation type and paths
files = {"confirmed" : "time_series_19-covid-Confirmed.csv",
"deaths" : "time_series_19-covid-Deaths.csv",
"recovered": "time_series_19-covid-Recovered.csv"}
headers = {'Accept': 'application/vnd.github.v3.raw'}
df_list = []
for obs, path in files.items():
url = "https://api.github.com/repos/CSSEGISandData/COVID-19/contents/csse_covid_19_data/csse_covid_19_time_series/{path}".format(path=path)
r = requests.get(url, headers=headers)
_io = BytesIO(r.content)
df = pd.read_csv(_io)
date_range = parse_dates(df)
df['Observation'] = obs
df_list.append(df)
dx = pd.concat(df_list)
if _long:
variables = list(df.columns[:4]) + ["Observation"]
return dx.melt(value_vars=date_range, id_vars=variables, var_name="Date")
else:
return dx
def parse_dates(df):
names = {date: datetime.datetime.strptime('1/22/20', "%m/%d/%y").date() for date in df.columns[4:]}
df.rename(columns=names, inplace=True)
return names.values()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment